#### Ward Level Analysis ####

library(tidyverse)
library(ggpubr)
library(patchwork)
library(synthdid)
library(ggrepel)
library(xtable)
library(kableExtra)
library(knitr)
library(stargazer)
library(modelsummary)
library(purrr)
library(gsynth)
library(plotrix)
library(sandwich)
library(lmtest)

set.seed(7390)

ward_full <- read.csv('data/ward_full.csv')

#### Creating Figure 2 ####

ward_full <- ward_full %>% 
  filter(count == 5 & loc != 'Tower Hamlets') %>% 
  mutate(nbrom = ifelse(loc == 'Bromley', 1 , 0))

trnt_diff <- ward_full %>% 
  group_by(nbrom, year) %>% 
  summarise(n(), 
            mean.trnt = mean(trnt, na.rm = T)) %>% 
  mutate(bromley = ifelse(nbrom == 1, 'Bromley', 'Not Bromley'))

diff_trnt <- ward_full %>% 
  group_by(year) %>% 
  summarise(n(),
            mean.b = mean(trnt[nbrom == 1], na.rm =T), 
            mean.c = mean(trnt[nbrom == 0], na.rm = T),
            trnt.diff = round((mean.b - mean.c)*100, 2))

text_trnt_plot <- trnt_diff %>% 
  group_by(nbrom) %>%
  slice_head(n = 1) %>% 
  summarise(year = year,
            mean.trnt = mean.trnt,
            bromley = bromley)

trnt_plot <- ggplot(trnt_diff)+
  geom_point(aes(y = as.numeric(mean.trnt), x = year, 
                 colour = rev(bromley)))+
  geom_line(aes(y = as.numeric(mean.trnt), x = year,
                colour = rev(bromley)))+
  scale_x_continuous('Year',
                     labels = seq(2002, 2018, by = 4),
                     breaks = seq(2002, 2018, by = 4))+
  scale_y_continuous('Turnout', labels = scales::percent,
                     breaks = seq(0.2, 0.7, by = 0.1))+
  geom_text(data = text_trnt_plot,
            aes(x = year-0.5, # adding some offset  
                y = as.numeric(mean.trnt), 
                colour = rev(bromley), label = bromley),
            size = 3,
            hjust = 0, 
            vjust = 2)+
  geom_text(data = diff_trnt,
            aes(x = year,
                y = mean.b + 0.015, 
                label = trnt.diff),
            size = 3, 
            hjust = 'outward')+
  coord_cartesian(xlim = c(2001, 2019), ylim = c(0.25,0.72))+
  theme_minimal()+
  theme(panel.grid = element_blank(),
        legend.position = 'none')

trnt_plot

ggsave('plots/trnt_plot.png', trnt_plot, width = 15, height = 10, 
       units = 'cm')

#### Naive Difference-in-Difference ####

ward_full <- ward_full %>% 
  mutate(ever_treated = ifelse(loc == 'Bromley', 1, 0))

naive_mod <- lm(trnt ~ ever_treated*factor(year), ward_full %>% 
                  filter(trnt > 0))

summary(naive_mod)
coefs_naive <- c(coef(naive_mod))


se_wild <- sqrt(diag(vcovBS(naive_mod, cluster = ~ loc, 
                            type = 'wild', B = 1000)))
up_ci_wild <- c(coef(naive_mod) + 1.96*se_wild)
lw_ci_wild <- c(coef(naive_mod) - 1.96*se_wild)


se_wild_mammen <- sqrt(diag(vcovBS(naive_mod, cluster = ~ loc, 
                                   type = 'wild-mammen', B= 1000)))
up_ci_wild_mammen <- c(coef(naive_mod) + 1.96*se_wild_mammen)
lw_ci_wild_mammen <- c(coef(naive_mod) - 1.96*se_wild_mammen)


se_wild_webb <- sqrt(diag(vcovBS(naive_mod, cluster = ~ loc, 
                                 type = 'wild-webb', B= 1000)))
up_ci_wild_webb <- c(coef(naive_mod) + 1.96*se_wild_webb)
lw_ci_wild_webb <- c(coef(naive_mod) - 1.96*se_wild_webb)


#### Creating Figure 3 ####

wild_boot_plt <- ggplot() +
  geom_errorbar(aes( x= c('Wild', 'Wild Mammen', 'Wild Webb'),
                     ymin = c(as.vector(lw_ci_wild[10]), 
                              as.vector(lw_ci_wild_webb[10]),
                              as.vector(lw_ci_wild_webb[10])), 
                     ymax = as.vector(up_ci_wild[10]), 
                     as.vector(up_ci_wild_mammen[10]),
                     as.vector(up_ci_wild_webb[10])), 
                colour = '#F8766D',   width = 0.25, size = 1.5) +
  geom_point(aes(x = c('Wild', 'Wild Mammen', 'Wild Webb'), 
                 y = as.vector(coefs_naive[10])), colour = '#00A9ff', size = 2)+
  geom_hline(yintercept = 0, linetype = 2, colour = '#F8766D')+
  xlab('Method')+
  ylab('Estimate')+
  theme_minimal()+
  coord_cartesian(ylim = c(-0.08, 0.01))+
  theme(panel.grid = element_blank(),
        axis.text = element_text(size = 14),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14),
        axis.title = element_text(size = 14))

wild_boot_plt
ggsave('plots/wild_boot_plt.png', wild_boot_plt, 
       width = 25, height = 20, units = 'cm')


#### Interactive Fixed Effects ####

ife_non_para <- gsynth(trnt ~ treat, cl = 'loc',
                       data = ward_full, index = c('ward_code', 'year'), 
                       min.T0 = 4, estimator = 'ife', force = 'two-way', 
                       r = c(0,2), se = T, inference = 'nonparametric', 
                       seed = 7390)

ife_para <- gsynth(trnt ~ treat, cl = 'loc',
                   data = ward_full, index = c('ward_code', 'year'), 
                   min.T0 = 4, estimator = 'ife', force = 'two-way', 
                   r = c(0,2), se = T, inference = 'parametric', 
                   seed = 7390)

ife_jackknife <- gsynth(trnt ~ treat, cl = 'loc',
                        data = ward_full, index = c('ward_code', 'year'), 
                        min.T0 = 4, estimator = 'ife', force = 'two-way', 
                        r = c(0,2), se = T, inference = 'jackknife', 
                        seed = 7390)

### Point Estimates ###
ife_para$est.att[5,1] 
ife_non_para$est.att[5,1]
ife_jackknife$est.att[5,1]

### Lower Bounds ###
ife_para$est.att[5,3] 
ife_non_para$est.att[5,3]
ife_jackknife$est.att[5,3]

### Upper Bounds ###

ife_para$est.att[5,4] 
ife_non_para$est.att[5,4]
ife_jackknife$est.att[5,4]

### Creating Figure 4 ###

ife_plt <- ggplot() +
  geom_errorbar(aes(x= c('Parametric', 'Non-Parametric', 'Jackknife'),
                    ymin = c(ife_para$est.att[5,3], 
                             ife_non_para$est.att[5,3],
                             ife_jackknife$est.att[5,3])), 
                ymax = c(ife_para$est.att[5,4], 
                         ife_non_para$est.att[5,4],
                         ife_jackknife$est.att[5,4]),colour = '#F8766D', width = 0.25, size = 1.5) +
  geom_point(aes(x = c('Parametric', 'Non-Parametric', 'Jackknife'), 
                 y = c(ife_para$est.att[5,1], 
                       ife_non_para$est.att[5,1],
                       ife_jackknife$est.att[5,1])),colour = '#00A9FF', size = 2)+
  geom_hline(yintercept = 0, linetype = 2, colour = '#F8766D')+
  scale_y_continuous(labels = seq(-0.08, 0, by = 0.02),
                     breaks = seq(-0.08, 0, by = 0.02))+
  xlab('Method')+
  ylab('Estimate')+
  theme_minimal()+
  coord_cartesian(ylim = c(-0.07, 0.01))+
  theme(panel.grid = element_blank(),
        axis.text = element_text(size = 14),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14),
        axis.title = element_text(size = 14))

ife_plt

ggsave('plots/ife_plt.png', ife_plt, 
       width = 25, height = 20, units = 'cm')

#### Local Average Treatment Effects ####

soc_grad_full <- read.csv('data/social_grade.csv')
eco_act <- read.csv('data/eco_act.csv')
ethnicity <- read.csv('data/per_minority.csv')
eco_act <- left_join(eco_act, ward_full, by = 'ward_code')
full_com <- left_join(soc_grad_full, eco_act, 
                      by = 'ward_code')
full_com <- left_join(full_com, ethnicity, by = "ward_code")


### Race ###

full_com_hb <- full_com %>% # High Proportion of Black Residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_max(prop = 0.25, order_by = per_black) %>% 
  ungroup()

ife_para_hb <- gsynth(trnt ~ treat, cl = 'loc',
                      data = full_com_hb, index = c('ward_code', 'year'), 
                      min.T0 = 4, estimator = 'ife', force = 'two-way', 
                      r = c(0,2), se = T, inference = 'parametric', 
                      seed = 7390)

ife_para_hb

full_com_lb <- full_com %>% # Low proportion of black residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_min(prop = 0.25, order_by = per_black) %>% 
  ungroup()


ife_para_lb <- gsynth(trnt ~ treat, cl = 'loc',
                      data = full_com_lb, index = c('ward_code', 'year'), 
                      min.T0 = 4, estimator = 'ife', force = 'two-way', 
                      r = c(0,2), se = T, inference = 'parametric', 
                      seed = 7390)

ife_para_lb

### Class ###

full_com_de <- full_com %>% #High proportion of DE Residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_max(prop = 0.25, order_by = prop_de) %>% 
  ungroup()

ife_para_hde <- gsynth(trnt ~ treat, cl = 'loc',
                       data = full_com_de, index = c('ward_code', 'year'), 
                       min.T0 = 4, estimator = 'ife', force = 'two-way', 
                       r = c(0,2), se = T, inference = 'parametric', 
                       seed = 7390)

ife_para_hde

full_com_lde <- full_com %>% #Low proportion of DE residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_min(prop = 0.25, order_by = prop_de) %>% 
  ungroup()


ife_para_lde <- gsynth(trnt ~ treat, cl = 'loc',
                       data = full_com_lde, index = c('ward_code', 'year'), 
                       min.T0 = 4, estimator = 'ife', force = 'two-way', 
                       r = c(0,2), se = T, inference = 'parametric', 
                       seed = 7390)
ife_para_lde

### Retired ###

full_com_hret <- full_com %>% #High proportion of retired residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_max(prop = 0.25, order_by = ret) %>% 
  ungroup()

ife_para_hret <- gsynth(trnt ~ treat, cl = 'loc',
                        data = full_com_hret, index = c('ward_code', 'year'), 
                        min.T0 = 4, estimator = 'ife', force = 'two-way', 
                        r = c(0,2), se = T, inference = 'para', 
                        seed = 7390)

ife_para_hret


full_com_lret <- full_com %>% #Low proportion of retired residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_min(prop = 0.25, order_by = ret) %>% 
  ungroup()


ife_para_lret <- gsynth(trnt ~ treat, cl = 'loc',
                        data = full_com_lret, index = c('ward_code', 'year'), 
                        min.T0 = 4, estimator = 'ife', force = 'two-way', 
                        r = c(0,2), se = T, inference = 'parametric', 
                        seed = 7390)

ife_para_lret

### Employment ###

full_com_hemp <- full_com %>% #High proportion of unemployed residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_max(prop = 0.25, order_by = unemploy) %>% 
  ungroup()


ife_para_hemp <- gsynth(trnt ~ treat, cl = 'loc',
                       data = full_com_hemp, index = c('ward_code', 'year'), 
                       min.T0 = 4, estimator = 'ife', force = 'two-way', 
                       r = c(0,2), se = T, inference = 'parametric', 
                       seed = 7390)

ife_para_hemp

full_com_lemp <- full_com %>% #Low proportion of unemployed residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_min(prop = 0.25, order_by = unemploy) %>% 
  ungroup()

ife_para_lemp <- gsynth(trnt ~ treat, cl = 'loc',
                        data = full_com_lemp, index = c('ward_code', 'year'), 
                        min.T0 = 4, estimator = 'ife', force = 'two-way', 
                        r = c(0,2), se = T, inference = 'parametric', 
                        seed = 7390)

ife_para_lemp

### Disabled ###

full_com_hdis <- full_com %>% #High proportion of disabled residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_max(prop = 0.25, order_by = sick_dis) %>% 
  ungroup()


ife_para_hdis <- gsynth(trnt ~ treat, cl = 'loc',
                        data = full_com_hdis, index = c('ward_code', 'year'), 
                        min.T0 = 4, estimator = 'ife', force = 'two-way', 
                        r = c(0,2), se = T, inference = 'parametric', 
                        seed = 7390)

ife_para_hdis

full_com_ldis <- full_com %>% #Low proportion of disabled residents
  filter(!is.na(count)) %>% 
  group_by(loc) %>% 
  slice_min(prop = 0.25, order_by = sick_dis) %>% 
  ungroup()

ife_para_ldis <- gsynth(trnt ~ treat, cl = 'loc',
                        data = full_com_ldis, index = c('ward_code', 'year'), 
                        min.T0 = 4, estimator = 'ife', force = 'two-way', 
                        r = c(0,2), se = T, inference = 'parametric', 
                        seed = 7390)
ife_para_ldis

### Creating Figure 7 ###

late_estimates <- as.data.frame(c('High % of Black Residents',
                                  'Low % of Black Residents',
                                  'High % of DE Residents',
                                  'Low % of DE Residents',
                                  'High % of Retired Residents',
                                  'Low % of Retired Residents', 
                                  'High % of Unemployed Residents',
                                  'Low % of Unemployed Residents', 
                                  'High % of Sick or Disabled Residents',
                                  'Low % of Sick or Disabled Residents'))

late_estimates_para <- late_estimates %>% 
  rename(estimate = `c("High % of Black Residents", "Low % of Black Residents", "High % of DE Residents", "Low % of DE Residents", "High % of Retired Residents", "Low % of Retired Residents", "High % of Unemployed Residents", "Low % of Unemployed Residents", "High % of Sick or Disabled Residents", "Low % of Sick or Disabled Residents")`) %>% 
  mutate(point = ifelse(estimate == "High % of Black Residents", ife_para_hb$est.att[5,1],
                 ifelse(estimate == "Low % of Black Residents", ife_para_lb$est.att[5,1],
                 ifelse(estimate == "High % of DE Residents", ife_para_hde$est.att[5,1], 
                 ifelse(estimate == "Low % of DE Residents", ife_para_lde$est.att[5,1], 
                 ifelse(estimate == "High % of Retired Residents", ife_para_hret$est.att[5,1], 
                 ifelse(estimate == "Low % of Retired Residents", ife_para_lret$est.att[5,1], 
                 ifelse(estimate == "High % of Unemployed Residents", ife_para_hemp$est.att[5,1], 
                 ifelse(estimate == "Low % of Unemployed Residents", ife_para_lemp$est.att[5,1], 
                 ifelse(estimate == "High % of Sick or Disabled Residents", ife_para_hdis$est.att[5,1], 
                 ifelse(estimate == "Low % of Sick or Disabled Residents", ife_para_ldis$est.att[5,1], NA)))))))))), 
         se = ifelse(estimate == "High % of Black Residents", ife_para_hb$est.att[5,2],
              ifelse(estimate == "Low % of Black Residents", ife_para_lb$est.att[5,2],
              ifelse(estimate == "High % of DE Residents", ife_para_hde$est.att[5,2], 
              ifelse(estimate == "Low % of DE Residents", ife_para_lde$est.att[5,2], 
              ifelse(estimate == "High % of Retired Residents", ife_para_hret$est.att[5,2], 
              ifelse(estimate == "Low % of Retired Residents", ife_para_lret$est.att[5,2], 
              ifelse(estimate == "High % of Unemployed Residents", ife_para_hemp$est.att[5,2], 
              ifelse(estimate == "Low % of Unemployed Residents", ife_para_lemp$est.att[5,2], 
              ifelse(estimate == "High % of Sick or Disabled Residents", ife_para_hdis$est.att[5,2], 
              ifelse(estimate == "Low % of Sick or Disabled Residents", ife_para_ldis$est.att[5,2], NA)))))))))))

late_estimates_para$estimate <- factor(late_estimates_para$estimate, 
                                       levels=unique(late_estimates_para$estimate))

late_mods <- ggplot(late_estimates_para)+
  geom_errorbar(aes(ymin =  point - 1.96*se,
                    ymax =  point + 1.96*se,
                    x = estimate), colour = '#F8766D',
                alpha = 0.8, width = 0.25, size = 1.5, 
                position = position_dodge(width = 0.5))+
  geom_point(aes(y = point, 
                 x = estimate), colour = '#00BFC4', alpha = 0.8, size = 2,
             position = position_dodge(width = 0.5))+
  coord_cartesian(ylim = c(-0.15, 0.05))+
  scale_y_continuous(breaks = seq(-0.2, 0.1, by = 0.02),
                     labels = seq(-0.2, 0.1, by = 0.02), 
                     'Treatment Effect')+
  scale_x_discrete('Design', 
                   guide = guide_axis(n.dodge = 1))+
  theme_minimal()+
  theme(panel.grid = element_blank(), 
        axis.text = element_text(size = 12),
        axis.text.x = element_text(angle = -90),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 12),
        axis.title = element_text(size = 12))+
  geom_hline(yintercept = 0, colour = '#F8766D', linetype = 2)+
  coord_cartesian(xlim = c(1,10))

late_mods

ggsave('plots/late_mods.png', late_mods, 
       width = 35, height = 25, units = 'cm')
